Load packages:
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Contains CO2 data:
historic_co2 <- read_csv("data/historice_co2.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## co2 = col_double(),
## source = col_character()
## )
co2_plot <- ggplot(data = historic_co2,
aes(x = year, y = co2, color = source)) +
geom_line()
co2_plot
Using a different package (plotly), we can make this interactive with one (!!) line of code!
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(co2_plot)
murders <- read_csv("data/murders.csv")
## Parsed with column specification:
## cols(
## state = col_character(),
## abb = col_character(),
## region = col_character(),
## population = col_double(),
## total = col_double()
## )
Contains
Overall average
murders %>%
summarize(mean_total = mean(total))
## # A tibble: 1 x 1
## mean_total
## <dbl>
## 1 184.
Any state differences?
murders %>%
group_by(state) %>%
summarize(mean_total = mean(total))
## # A tibble: 51 x 2
## state mean_total
## <chr> <dbl>
## 1 alabama 135
## 2 alaska 19
## 3 arizona 232
## 4 arkansas 93
## 5 california 1257
## 6 colorado 65
## 7 connecticut 97
## 8 delaware 38
## 9 district of columbia 99
## 10 florida 669
## # … with 41 more rows
Any region differences?
murders %>%
group_by(region) %>%
summarize(mean_total = mean(total))
## # A tibble: 4 x 2
## region mean_total
## <chr> <dbl>
## 1 North Central 152.
## 2 Northeast 163.
## 3 South 247.
## 4 West 147
Should probably do per capita:
murders %>%
mutate(n_per_1000 = total / population * 1000) %>%
group_by(region) %>%
summarize(mean_total = mean(n_per_1000))
## # A tibble: 4 x 2
## region mean_total
## <chr> <dbl>
## 1 North Central 0.0218
## 2 Northeast 0.0185
## 3 South 0.0442
## 4 West 0.0183
South seems higher. But any difference between Northeast and North Central?
Let’s create a pretty map. Need some state data first:
states_map <- map_data("state")
Create the map, and save it for later use:
murders_by_state <- ggplot(murders, aes(map_id = state)) +
geom_map(aes(fill = total / population * 1000),
map = states_map) +
coord_map() +
expand_limits(x = states_map$long, y = states_map$lat)
Show map
murders_by_state
Interactive:
ggplotly(murders_by_state)
Read data:
research_funding_rates <- read_csv("data/research_funding_rates.csv")
## Parsed with column specification:
## cols(
## discipline = col_character(),
## gender = col_character(),
## applications = col_double(),
## awards = col_double()
## )
Overall success rates:
research_funding_rates %>%
group_by(gender) %>%
summarize(total_applications = sum(applications),
total_awards = sum(awards)) %>%
mutate(success_rate = total_awards / total_applications)
## # A tibble: 2 x 4
## gender total_applications total_awards success_rate
## <chr> <dbl> <dbl> <dbl>
## 1 men 1635 290 0.177
## 2 women 1188 177 0.149
By discipline:
research_funding_rates %>%
mutate(success_rate = awards / applications)
## # A tibble: 18 x 5
## discipline gender applications awards success_rate
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Chemical sciences men 83 22 0.265
## 2 Chemical sciences women 39 10 0.256
## 3 Physical sciences men 135 26 0.193
## 4 Physical sciences women 39 9 0.231
## 5 Physics men 67 18 0.269
## 6 Physics women 9 2 0.222
## 7 Humanities men 230 33 0.143
## 8 Humanities women 166 32 0.193
## 9 Technical sciences men 189 30 0.159
## 10 Technical sciences women 62 13 0.210
## 11 Interdisciplinary men 105 12 0.114
## 12 Interdisciplinary women 78 17 0.218
## 13 Earth/life sciences men 156 38 0.244
## 14 Earth/life sciences women 126 18 0.143
## 15 Social sciences men 425 65 0.153
## 16 Social sciences women 409 47 0.115
## 17 Medical sciences men 245 46 0.188
## 18 Medical sciences women 260 29 0.112
Bar chart:
success_rates <- research_funding_rates %>%
mutate(success_rate = awards / applications)
ggplot(data = success_rates,
aes(x = discipline, y = success_rate, fill = gender)) +
geom_bar(stat = 'identity', position = position_dodge()) +
theme(axis.text.x = element_text(angle = -45, hjust = 0))
library(leaflet)
leaflet() %>% addTiles()
leaflet() %>%
addProviderTiles(provider = "Esri.WorldImagery",
options = tileOptions(minZoom = 2,
noWrap = TRUE)) %>%
addPopups(lng = c(-89.405032, -89.407260, 11.324524),
lat = c(43.074805, 43.074241, 55.257877),
popup = c("We're here.", "MSC", "Denmark!"),
options = popupOptions(closeButton = FALSE,
closeOnClick = FALSE)) %>%
setView(lng = -89.405032, lat = 43.074805, zoom = 20)